#This script generates synthetic articles for robustness checks across different languages
#It uses GPT-4o to generate the articles
#It generates 50 articles for each language and each type of criticism
#It produces a csv file with the articles
#NOTE: This script will not produce exactly the same results as the paper
#Due to the stochastic nature of the language model
import requests
from openai import OpenAI
import time
import csv
import os

# Initialize the OpenAI client
client = OpenAI(api_key='') #Replace with your own API key

languages = ["English", "French", "Spanish", "Russian", "Mandarin", "Arabic", "Japanese", "Korean"]

csv_file_path = "data/synthetic/synthetic_articles_all_gpt-4o.csv"
csv_dir = os.path.dirname(csv_file_path)

# Check if the directory exists, if not, create it
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)

# Check if the file exists, if not, create it and write the header
if not os.path.exists(csv_file_path):
    with open(csv_file_path, 'w', encoding='utf-8', newline='') as csvfile:
        fieldnames = ['Language', 'Criticism', 'Index', 'Article']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

# Open the file in append mode for adding new data
with open(csv_file_path, 'a', encoding='utf-8', newline='') as csvfile:
    fieldnames = ['Language', 'Criticism', 'Index', 'Article']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    global_index = 0  # Initialize global index

    for lang in languages:
        for crit_type in ['Critical', 'Not Critical']:
            for _ in range(50):  # Assuming 500 articles per type
                messages = [
                    {"role": "system", "content": "You are a political journalist."},
                    {"role": "user", "content": f"Write a 500-word political news article in {lang} that is {'' if crit_type == 'Critical' else 'not '}critical of the leader POLITFIG. The article does not require a title. Do not translate the word POLITFIG."},
                ]
                while True:
                    try:
                        response = client.chat.completions.create(model="gpt-4o",
                                                                  messages=messages,
                                                                  max_tokens=500,
                                                                  temperature=0.7)
                        message_content = response.choices[0].message.content.strip()
                        global_index += 1
                        writer.writerow({
                            'Language': lang,
                            'Criticism': crit_type,
                            'Index': global_index,
                            'Article': message_content
                        })
                        # Flush the file buffer after each row write
                        csvfile.flush()
                        # Optionally, force OS-level flush:
                        # os.fsync(csvfile.fileno())
                        break
                    except requests.exceptions.ReadTimeout:
                        print("Read timeout, retrying...")
                        continue
                    except requests.exceptions.RequestException as e:
                        print(f"Request exception {e}, sleeping for 60 seconds...")
                        time.sleep(60)
                    except Exception as e:
                        print(f"Unexpected error: {e}, sleeping for 60 seconds...")
                        time.sleep(60)